The data set provided by Trendyol consists of 13 columns and 4331 rows. There are 9 products and for each there are 12 attributes for each day between 05-25-2020 and 31-05-2021.
Type of event_date transformed to “Date” and “Day”, “Month” and “Year” columns are added to the data set.
For each of 9 products a separate data set is constructed and observations ordered from old to new.
library(data.table)
library(lubridate)
library(ggplot2)
library(forecast)
library(urca)
setwd("C:/Users/Pınar YILDIRIM/Downloads")
#Data download
my_data= read.csv("ProjectRawData.csv")
my_data=as.data.table(my_data)
my_data$event_date=ymd(my_data$event_date)
my_data[,"Day" := lubridate::wday(event_date, label=TRUE)]
my_data[,"Month" := lubridate::month(event_date, label=TRUE)]
my_data[,"Year" := lubridate::year(event_date)]
#PRODUCTS
###PRODUCT 1
product1= my_data[my_data$product_content_id==31515569,]
#order observations from old to new
product1=(product1[order(product1$event_date),])
my_data[product_content_id==31515569, product:="1"]
###PRODUCT 2
product2= my_data[my_data$product_content_id==32737302,]
#order observations from old to new
product2=(product2[order(product2$event_date),])
my_data[product_content_id==32737302, product:="2"]
###PRODUCT 3
product3= my_data[my_data$product_content_id==32939029,]
#order observations from old to new
product3=(product3[order(product3$event_date),])
my_data[product_content_id==32939029, product:="3"]
###PRODUCT 4
product4= my_data[my_data$product_content_id==4066298,]
#order observations from old to new
product4=(product4[order(product4$event_date),])
my_data[product_content_id==4066298, product:="4"]
###PRODUCT 5
product5= my_data[my_data$product_content_id==48740784,]
#order observations from old to new
product5=(product5[order(product5$event_date),])
my_data[product_content_id==48740784, product:="5"]
###PRODUCT 6
product6= my_data[my_data$product_content_id==6676673,]
#order observations from old to new
product6=(product6[order(product6$event_date),])
my_data[product_content_id==6676673, product:="6"]
###PRODUCT 7
product7= my_data[my_data$product_content_id==7061886,]
#order observations from old to new
product7=(product7[order(product7$event_date),])
my_data[product_content_id==7061886, product:="7"]
###PRODUCT 8
product8= my_data[my_data$product_content_id==73318567,]
#order observations from old to new
product8=(product8[order(product8$event_date),])
my_data[product_content_id==73318567, product:="8"]
###PRODUCT 9
product9= my_data[my_data$product_content_id==85004,]
#order observations from old to new
product9=(product9[order(product9$event_date),])
my_data[product_content_id==85004, product:="9"]
accu=function(actual,forecast){
n=length(actual)
error=actual-forecast
error=complete.cases(error)
mean=mean(actual)
#sd=sd(actual)
#CV=sd/mean
FBias=sum(error)/sum(actual)
MAPE=sum(abs(error/actual))/n
RMSE=sqrt(sum(error^2)/n)
MAD=sum(abs(error))/n
MADP=sum(abs(error))/sum(abs(actual))
WMAPE=MAD/mean
l=data.frame(n,mean,FBias,MAPE,RMSE,MAD,MADP,WMAPE)
return(l)
}
ID of product 1 is “31515569” and it is a legging by “TRENDYOLMILLA”.
(ggplot(product1,aes(x=event_date,y=sold_count))+
geom_line()+
theme_minimal()+
labs(title="Product 1",y="Sold Count", x="Event Date"))
The first plot shows the average of products sold for each weekday.
product1[,wday_mean := mean(sold_count), by=.(Day)]
(ggplot(product1,aes(x=event_date,y=wday_mean))+
geom_line()+
theme_minimal()+
labs(title="Product 1",y="Average Sold Count by Weekdays", x="Event Date"))
Second plot shows the difference of average sold counts for each weekday in 2020 and 2021.
product1[,wday_mean := mean(sold_count), by=.(Day)]
(ggplot(product1,aes(x=event_date,y=wday_mean))+
geom_line()+
theme_minimal()+
labs(title="Product 1",y="Average Sold Count by Weekdays", x="Event Date"))
Third plot shows the average sold counts for each weekday for each month. It is possible to say that each weekday has different effects depending on the month and year.
product1[,wday_mean := mean(sold_count), by=.(year(event_date),Day,Month)]
(ggplot(product1,aes(x=event_date,y=wday_mean))+
geom_line()+
theme_minimal()+
labs(title="Product 1",y="Average Sold Count by Weekdays", x="Event Date"))
Finally, when sold counts of each weekday is plotted for each year and month there is not a clear pattern as seen in the plot below.
pr1_wday=product1[,list(wday_mean), by=.(Day, Month, year(event_date))]
pr1_wday
## Day Month year wday_mean
## 1: Pzt May 2020 610.00
## 2: Sal May 2020 437.00
## 3: Çar May 2020 270.00
## 4: Per May 2020 366.00
## 5: Cum May 2020 1188.00
## ---
## 368: Per May 2021 417.25
## 369: Cum May 2021 388.00
## 370: Cum May 2021 388.00
## 371: Cum May 2021 388.00
## 372: Cum May 2021 388.00
(ggplot(pr1_wday,aes(x=Day,y=wday_mean))+
geom_line(aes(group=factor(Month), color=Month))+
facet_grid(rows=pr1_wday$year)+
theme_minimal()+
labs(title="Product 1",y="Average Sold Count by Weekdays", x="Event Date"))
Time series of Prouduct 1 is decomposed weekly and additively. There is somewhat a seasonality in Trend term and Random term fails to satisfy constant variance assumption for a period. Test- statistic of KPSS test is smaller than the critical values which indicates stationarity.
weekly_pr1 = ts(product1$sold_count, freq=7)
weekly_decomp_pr1_add = decompose(weekly_pr1)
plot(weekly_decomp_pr1_add)
test_stat <- ur.kpss(weekly_decomp_pr1_add$random, use.lag = "8")
summary(test_stat)
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 8 lags.
##
## Value of test-statistic is: 0.0149
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
In multiplicative decomposition there is still a seasonality in Trend term but random term has smaller variance than the additive decomposition. KPSS test statistic is greater than the additive decomposition.
weekly_decomp_pr1_mult = decompose(weekly_pr1, type="multiplicative")
plot(weekly_decomp_pr1_mult)
test_stat <- ur.kpss(weekly_decomp_pr1_mult$random, use.lag = "8")
summary(test_stat)
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 8 lags.
##
## Value of test-statistic is: 0.1564
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
The first plot shows the average of products sold for each day of month.
product1[,day_mean := mean(sold_count), by=.(day(event_date))]
(ggplot(product1,aes(x=event_date,y=day_mean))+
geom_line()+
theme_minimal()+
labs(title="Product 1",y="Average Sold Count by Days of Month", x="Event Date"))
Second plot shows the difference of average sold counts for each day of month in 2020 and 2021.
product1[,day_mean := mean(sold_count), by=.(year(event_date),day(event_date))]
(ggplot(product1,aes(x=event_date,y=day_mean))+
geom_line()+
theme_minimal()+
labs(title="Product 1",y="Average Sold Count by Days of Month", x="Event Date"))
Finally, when sold counts of each day of month is plotted for each 2020 and 2021 there a pattern is observed. On the 10th day sales are high and around 15th day sales drop.
pr1_day=product1[,list(day_mean), by=.(day(event_date), year(event_date))]
(ggplot(pr1_day,aes(x=day,y=day_mean))+
geom_line(aes(group=factor(year), color=factor(year)))+
theme_minimal()+
labs(title="Product 1",y="Average Sold Count by Days of Month", x="Event Date"))
Additive monthly decomposition does not have a seasonality in Trend term.
KPSS test has small test statistic
monthly_pr1 = ts(product1$sold_count, freq=30)
monthly_decomp_pr1 = decompose(monthly_pr1)
plot(monthly_decomp_pr1)
test_stat <- ur.kpss(monthly_decomp_pr1$random, use.lag = "12")
summary(test_stat)
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 12 lags.
##
## Value of test-statistic is: 0.0177
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
monthly_decomp_pr1_mult = decompose(monthly_pr1, type="multiplicative")
plot(monthly_decomp_pr1_mult)
test_stat <- ur.kpss(monthly_decomp_pr1_mult$random, use.lag = "12")
summary(test_stat)
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 12 lags.
##
## Value of test-statistic is: 0.0551
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
The smallest test statistics belongs to weekly additive decomposition for Product 1.
ID of product 2 is “32737302” and it is a bikini top by “TRENDYOLMILLA”.
(ggplot(product2,aes(x=event_date,y=sold_count))+
geom_line()+
theme_minimal()+
labs(title="Product 2",y="Sold Count", x="Event Date"))
The first plot shows the average of products sold for each weekday.
product2[,wday_mean := mean(sold_count), by=.(Day)]
(ggplot(product2,aes(x=event_date,y=wday_mean))+
geom_line()+
theme_minimal()+
labs(title="Product 2",y="Average Sold Count by Weekdays", x="Event Date"))
Second plot shows the difference of average sold counts for each weekday in 2020 and 2021. The change in mean is due to 2020 winter period where almost zero products are sold.
product2[,wday_mean := mean(sold_count), by=.(year(event_date),Day)]
(ggplot(product2,aes(x=event_date,y=wday_mean))+
geom_line()+
theme_minimal()+
labs(title="Product 2",y="Average Sold Count by Weekdays", x="Event Date"))
Third plot shows the average sold counts for each weekday for each month. It is possible to say that each weekday has more or less similar effects for each month and year.
product2[,wday_mean := mean(sold_count), by=.(year(event_date),Day,Month)]
(ggplot(product2,aes(x=event_date,y=wday_mean))+
geom_line()+
theme_minimal()+
labs(title="Product 2",y="Average Sold Count by Weekdays", x="Event Date"))
Finally, when sold counts of each weekday is plotted for each year and month there is a pattern pbserved in summer months.
pr2_wday=product2[,list(wday_mean), by=.(Day, Month, year(event_date))]
(ggplot(pr2_wday,aes(x=Day,y=wday_mean))+
geom_line(aes(group=factor(Month), color=Month))+
facet_grid(rows=pr2_wday$year)+
theme_minimal()+
labs(title="Product 2",y="Average Sold Count by Weekdays", x="Event Date"))
Time series of Prouduct 2 is decomposed weekly and additively. There is somewhat a seasonality in Trend term and Random term fails to satisfy constant variance assumption.
weekly_pr2 = ts(product2$sold_count, freq=7)
weekly_decomp_pr2_add = decompose(weekly_pr2)
plot(weekly_decomp_pr2_add)
test_stat <- ur.kpss(weekly_decomp_pr2_add$random, use.lag = "8")
summary(test_stat)
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 8 lags.
##
## Value of test-statistic is: 0.0319
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
In multiplicative decomposition there is still a seasonality in Trend term but random term has smaller variance than the additive decomposition.
weekly_decomp_pr2_mult = decompose(weekly_pr2, type="multiplicative")
plot(weekly_decomp_pr2_mult)
test_stat <- ur.kpss(weekly_decomp_pr2_mult$random, use.lag = "8")
summary(test_stat)
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 8 lags.
##
## Value of test-statistic is: 0.1042
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
The first plot shows the average of products sold for each day of month.
product2[,day_mean := mean(sold_count), by=.(day(event_date))]
(ggplot(product2,aes(x=event_date,y=day_mean))+
geom_line()+
theme_minimal()+
labs(title="Product 2",y="Average Sold Count by Days of Month", x="Event Date"))
Second plot shows the difference of average sold counts for each day of month in 2020 and 2021. Patterns are similar in both years.
product2[,day_mean := mean(sold_count), by=.(year(event_date),day(event_date))]
(ggplot(product2,aes(x=event_date,y=day_mean))+
geom_line()+
theme_minimal()+
labs(title="Product 2",y="Average Sold Count by Days of Month", x="Event Date"))
Finally, when sold counts of each day of month is plotted for each 2020 and 2021 the patterns are not similar. This observation may be due to the winter period of 2020.
pr2_day=product2[,list(day_mean), by=.(day(event_date), year(event_date))]
(ggplot(pr2_day,aes(x=day,y=day_mean))+
geom_line(aes(group=factor(year), color=factor(year)))+
theme_minimal()+
labs(title="Product 2",y="Average Sold Count by Days of Month", x="Event Date"))
Constant variance assumption of random term is violated.
monthly_pr2 = ts(product2$sold_count, freq=30)
monthly_decomp_pr2 = decompose(monthly_pr2)
plot(monthly_decomp_pr2)
test_stat <- ur.kpss(weekly_decomp_pr2_mult$random, use.lag = "12")
summary(test_stat)
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 12 lags.
##
## Value of test-statistic is: 0.1277
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
monthly_decomp_pr2_mult = decompose(monthly_pr2, type="multiplicative")
plot(monthly_decomp_pr2_mult)
test_stat <- ur.kpss(weekly_decomp_pr2_mult$random, use.lag = "12")
summary(test_stat)
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 12 lags.
##
## Value of test-statistic is: 0.1277
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
Additive weekly decomposition is chosen for Product 2 since its test statistic is the smaller
ID of product 3 is “32939029” and it is a chargable tooth brush by “Oral-B”.
(ggplot(product3,aes(x=event_date,y=sold_count))+
geom_line()+
theme_minimal()+
labs(title="Product 3",y="Sold Count", x="Event Date"))
The first plot shows the average of products sold for each weekday.
product3[,wday_mean := mean(sold_count), by=.(Day)]
(ggplot(product3,aes(x=event_date,y=wday_mean))+
geom_line()+
theme_minimal()+
labs(title="Product 3",y="Average Sold Count by Weekdays", x="Event Date"))
Second plot shows the difference of average sold counts for each weekday in 2020 and 2021.
product3[,wday_mean := mean(sold_count), by=.(year(event_date),Day)]
(ggplot(product3,aes(x=event_date,y=wday_mean))+
geom_line()+
theme_minimal()+
labs(title="Product 3",y="Average Sold Count by Weekdays", x="Event Date"))
Third plot shows the average sold counts for each weekday for each month. It is possible to say that each weekday has more or less similar effects for each month and year.
product3[,wday_mean := mean(sold_count), by=.(year(event_date),Day,Month)]
(ggplot(product3,aes(x=event_date,y=wday_mean))+
geom_line()+
theme_minimal()+
labs(title="Product 3",y="Average Sold Count by Weekdays", x="Event Date"))
Finally, when sold counts of each weekday is plotted for each year and month a pattern is observed.
pr3_wday=product3[,list(wday_mean), by=.(Day, Month, year(event_date))]
(ggplot(pr3_wday,aes(x=Day,y=wday_mean))+
geom_line(aes(group=factor(Month), color=Month))+
facet_grid(rows=pr3_wday$year)+
theme_minimal()+
labs(title="Product 3",y="Average Sold Count by Weekdays", x="Event Date"))
Time series of Prouduct 3 is decomposed weekly and additively. There is somewhat a seasonality in Trend term and Random term fails to satisfy constant variance assumption.
weekly_pr3 = ts(product3$sold_count, freq=7)
weekly_decomp_pr3_add = decompose(weekly_pr3)
plot(weekly_decomp_pr3_add)
test_stat <- ur.kpss(weekly_decomp_pr3_add$random, use.lag = "8")
summary(test_stat)
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 8 lags.
##
## Value of test-statistic is: 0.024
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
In multiplicative decomposition there is still a seasonality in Trend term but random term has smaller variance than the additive decomposition.
weekly_decomp_pr3_mult = decompose(weekly_pr3, type="multiplicative")
plot(weekly_decomp_pr3_mult)
test_stat <- ur.kpss(weekly_decomp_pr3_mult$random, use.lag = "8")
summary(test_stat)
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 8 lags.
##
## Value of test-statistic is: 0.3276
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
The first plot shows the average of products sold for each day of month.
product3[,day_mean := mean(sold_count), by=.(day(event_date))]
(ggplot(product3,aes(x=event_date,y=day_mean))+
geom_line()+
theme_minimal()+
labs(title="Product 3",y="Average Sold Count by Days of Month", x="Event Date"))
Second plot shows the difference of average sold counts for each day of month in 2020 and 2021.
product3[,day_mean := mean(sold_count), by=.(year(event_date),day(event_date))]
(ggplot(product3,aes(x=event_date,y=day_mean))+
geom_line()+
theme_minimal()+
labs(title="Product 3",y="Average Sold Count by Days of Month", x="Event Date"))
Finally, when sold counts of each day of month is plotted for each 2020 and 2021 a pattern is observed in the plot below.
pr3_day=product3[,list(day_mean), by=.(day(event_date), year(event_date))]
(ggplot(pr3_day,aes(x=day,y=day_mean))+
geom_line(aes(group=factor(year), color=factor(year)))+
theme_minimal()+
labs(title="Product 3",y="Average Sold Count by Days of Month", x="Event Date"))
In additive decomposition constant variance assumption of random term is violated.
monthly_pr3 = ts(product3$sold_count, freq=30)
monthly_decomp_pr3 = decompose(monthly_pr3)
plot(monthly_decomp_pr3)
test_stat <- ur.kpss(monthly_decomp_pr3$random, use.lag = "12")
summary(test_stat)
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 12 lags.
##
## Value of test-statistic is: 0.0687
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
monthly_decomp_pr3_mult = decompose(monthly_pr3, type="multiplicative")
plot(monthly_decomp_pr3_mult)
test_stat <- ur.kpss(monthly_decomp_pr3_mult$random, use.lag = "12")
summary(test_stat)
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 12 lags.
##
## Value of test-statistic is: 0.1196
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
Additive weekly decomposition is chosen for Product 3 since its test statistics is the smallest.
ID of product 4 is “4066298” and it is a baby wet wipes by “Sleepy”.
(ggplot(product4,aes(x=event_date,y=sold_count))+
geom_line()+
theme_minimal()+
labs(title="Product 4",y="Sold Count", x="Event Date"))
The first plot shows the average of products sold for each weekday.
product4[,wday_mean := mean(sold_count), by=.(Day)]
(ggplot(product4,aes(x=event_date,y=wday_mean))+
geom_line()+
theme_minimal()+
labs(title="Product 4",y="Average Sold Count by Weekdays", x="Event Date"))
Second plot shows the difference of average sold counts for each weekday in 2020 and 2021.
product4[,wday_mean := mean(sold_count), by=.(year(event_date),Day)]
(ggplot(product4,aes(x=event_date,y=wday_mean))+
geom_line()+
theme_minimal()+
labs(title="Product 4",y="Average Sold Count by Weekdays", x="Event Date"))
Third plot shows the average sold counts for each weekday for each month. It is possible to say that each weekday has more or less similar effects for each month and year.
product4[,wday_mean := mean(sold_count), by=.(year(event_date),Day,Month)]
(ggplot(product4,aes(x=event_date,y=wday_mean))+
geom_line()+
theme_minimal()+
labs(title="Product 4",y="Average Sold Count by Weekdays", x="Event Date"))
Finally, when sold counts of each weekday is plotted for each year and month a pattern is observed mostly in winter months.
pr4_wday=product4[,list(wday_mean), by=.(Day, Month, year(event_date))]
(ggplot(pr4_wday,aes(x=Day,y=wday_mean))+
geom_line(aes(group=factor(Month), color=Month))+
facet_grid(rows=pr4_wday$year)+
theme_minimal()+
labs(title="Product 4",y="Average Sold Count by Weekdays", x="Event Date"))
Time series of Prouduct 4 is decomposed weekly and additively. There is somewhat a seasonality in Trend term and Random term fails to satisfy constant variance assumption.
weekly_pr4 = ts(product4$sold_count, freq=7)
weekly_decomp_pr4_add = decompose(weekly_pr4)
plot(weekly_decomp_pr4_add)
test_stat <- ur.kpss(weekly_decomp_pr4_add$random, use.lag = "8")
summary(test_stat)
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 8 lags.
##
## Value of test-statistic is: 0.014
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
In multiplicative decomposition there is still a seasonality in Trend term but random term has more constant variance than the additive decomposition.
weekly_decomp_pr4_mult = decompose(weekly_pr4, type="multiplicative")
plot(weekly_decomp_pr4_mult)
test_stat <- ur.kpss(weekly_decomp_pr4_mult$random, use.lag = "8")
summary(test_stat)
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 8 lags.
##
## Value of test-statistic is: 0.1415
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
The first plot shows the average of products sold for each day of month.
product4[,day_mean := mean(sold_count), by=.(day(event_date))]
(ggplot(product4,aes(x=event_date,y=day_mean))+
geom_line()+
theme_minimal()+
labs(title="Product 4",y="Average Sold Count by Days of Month", x="Event Date"))
Second plot shows the difference of average sold counts for each day of month in 2020 and 2021.
product4[,day_mean := mean(sold_count), by=.(year(event_date),day(event_date))]
(ggplot(product4,aes(x=event_date,y=day_mean))+
geom_line()+
theme_minimal()+
labs(title="Product 4",y="Average Sold Count by Days of Month", x="Event Date"))
Finally, when sold counts of each day of month is plotted for each 2020 and 2021.
pr4_day=product4[,list(day_mean), by=.(day(event_date), year(event_date))]
(ggplot(pr4_day,aes(x=day,y=day_mean))+
geom_line(aes(group=factor(year), color=factor(year)))+
theme_minimal()+
labs(title="Product 4",y="Average Sold Count by Days of Month", x="Event Date"))
Monthly decompositions of sold count of Product 4 are given below.
Seasonality in trend is removed.
monthly_pr4 = ts(product4$sold_count, freq=30)
monthly_decomp_pr4 = decompose(monthly_pr4)
plot(monthly_decomp_pr4)
test_stat <- ur.kpss(monthly_decomp_pr4$random, use.lag = "12")
summary(test_stat)
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 12 lags.
##
## Value of test-statistic is: 0.0299
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
monthly_decomp_pr4_mult = decompose(monthly_pr4, type="multiplicative")
plot(monthly_decomp_pr4_mult)
test_stat <- ur.kpss(monthly_decomp_pr4_mult$random, use.lag = "12")
summary(test_stat)
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 12 lags.
##
## Value of test-statistic is: 0.0243
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
Additive weekly decomposition is chosen for Product 4 since its test statistics is the smallest and random term is the most similar to a white noise series.
ID of product 5 is “48740784” and it is a coat by “Altınyıldız Classics”.
(ggplot(product5,aes(x=event_date,y=sold_count))+
geom_line()+
theme_minimal()+
labs(title="Product 5",y="Sold Count", x="Event Date"))
The first plot shows the average of products sold for each weekday.
product5[,wday_mean := mean(sold_count), by=.(Day)]
(ggplot(product5,aes(x=event_date,y=wday_mean))+
geom_line()+
theme_minimal()+
labs(title="Product 5",y="Average Sold Count by Weekdays", x="Event Date"))
Second plot shows the difference of average sold counts for each weekday in 2020 and 2021. Coat sales mostly occur in winter months. Since winter of 2021 is not observed yet mean of the sold count is smaller for 2021.
product5[,wday_mean := mean(sold_count), by=.(year(event_date),Day)]
(ggplot(product5,aes(x=event_date,y=wday_mean))+
geom_line()+
theme_minimal()+
labs(title="Product 5",y="Average Sold Count by Weekdays", x="Event Date"))
Third plot shows the average sold counts for each weekday for each month. It is possible to say that each weekday has more or less similar effects for each month and year.
product5[,wday_mean := mean(sold_count), by=.(year(event_date),Day,Month)]
(ggplot(product5,aes(x=event_date,y=wday_mean))+
geom_line()+
theme_minimal()+
labs(title="Product 5",y="Average Sold Count by Weekdays", x="Event Date"))
Time series of Prouduct 5 is decomposed weekly and additively. Trend term indicates the winter period where the coat sales are higher and random term has constant variance except an outlier period.
weekly_pr5 = ts(product5$sold_count, freq=7)
weekly_decomp_pr5_add = decompose(weekly_pr5)
plot(weekly_decomp_pr5_add)
test_stat <- ur.kpss(weekly_decomp_pr5_add$random, use.lag = "8")
summary(test_stat)
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 8 lags.
##
## Value of test-statistic is: 0.0124
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
In multiplicative decomposition there is still a seasonality in Trend term and random term has less constant variance than the additive decomposition.
weekly_decomp_pr5_mult = decompose(weekly_pr5, type="multiplicative")
plot(weekly_decomp_pr5_mult)
test_stat <- ur.kpss(weekly_decomp_pr5_mult$random, use.lag = "8")
summary(test_stat)
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 8 lags.
##
## Value of test-statistic is: 0.0981
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
The first plot shows the average of products sold for each day of month.
product5[,day_mean := mean(sold_count), by=.(day(event_date))]
(ggplot(product5,aes(x=event_date,y=day_mean))+
geom_line()+
theme_minimal()+
labs(title="Product 5",y="Average Sold Count by Days of Month", x="Event Date"))
Second plot shows the difference of average sold counts for each day of month in 2020 and 2021.
product5[,day_mean := mean(sold_count), by=.(year(event_date),day(event_date))]
(ggplot(product5,aes(x=event_date,y=day_mean))+
geom_line()+
theme_minimal()+
labs(title="Product 5",y="Average Sold Count by Days of Month", x="Event Date"))
Monthly decompositions of sold count of Product 5 are given below. Trend term show the high coat sale period. Effect of seasonality can be observed.
monthly_pr5 = ts(product5$sold_count, freq=30)
monthly_decomp_pr5 = decompose(monthly_pr5)
plot(monthly_decomp_pr5)
test_stat <- ur.kpss(monthly_decomp_pr5$random, use.lag = "12")
summary(test_stat)
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 12 lags.
##
## Value of test-statistic is: 0.0195
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
monthly_decomp_pr5_mult = decompose(monthly_pr5, type="multiplicative")
plot(monthly_decomp_pr5_mult)
test_stat <- ur.kpss(monthly_decomp_pr5_mult$random, use.lag = "12")
summary(test_stat)
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 12 lags.
##
## Value of test-statistic is: 0.1544
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
Additive weekly decomposition is chosen for Product 5 since its test statistics is the smallest and random term is the most similar to a white noise series.
ID of product 6 is “6676673” and it is bluetooth earphones by “Xiaomi”.
(ggplot(product6,aes(x=event_date,y=sold_count))+
geom_line()+
theme_minimal()+
labs(title="Product 6",y="Sold Count", x="Event Date"))
The first plot shows the average of products sold for each weekday.
product6[,wday_mean := mean(sold_count), by=.(Day)]
(ggplot(product6,aes(x=event_date,y=wday_mean))+
geom_line()+
theme_minimal()+
labs(title="Product 6",y="Average Sold Count by Weekdays", x="Event Date"))
Second plot shows the difference of average sold counts for each weekday in 2020 and 2021.
product6[,wday_mean := mean(sold_count), by=.(year(event_date),Day)]
(ggplot(product6,aes(x=event_date,y=wday_mean))+
geom_line()+
theme_minimal()+
labs(title="Product 6",y="Average Sold Count by Weekdays", x="Event Date"))
Third plot shows the average sold counts for each weekday for each month. It is possible to say that each weekday has more or less similar effects for each month and year.
product6[,wday_mean := mean(sold_count), by=.(year(event_date),Day,Month)]
(ggplot(product6,aes(x=event_date,y=wday_mean))+
geom_line()+
theme_minimal()+
labs(title="Product 6",y="Average Sold Count by Weekdays", x="Event Date"))
Finally, when sold counts of each weekday is plotted for each year and month a pattern is observed for some of the months.
pr6_wday=product6[,list(wday_mean), by=.(Day, Month, year(event_date))]
(ggplot(pr6_wday,aes(x=Day,y=wday_mean))+
geom_line(aes(group=factor(Month), color=Month))+
facet_grid(rows=pr6_wday$year)+
theme_minimal()+
labs(title="Product 6",y="Average Sold Count by Weekdays", x="Event Date"))
Time series of Prouduct 6 is decomposed weekly and additively. Seasonality can be observed. Random term has constant mean and more or less constant variance.
weekly_pr6 = ts(product6$sold_count, freq=7)
weekly_decomp_pr6_add = decompose(weekly_pr6)
plot(weekly_decomp_pr6_add)
test_stat <- ur.kpss(weekly_decomp_pr6_add$random, use.lag = "8")
summary(test_stat)
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 8 lags.
##
## Value of test-statistic is: 0.0135
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
weekly_decomp_pr6_mult = decompose(weekly_pr6, type="multiplicative")
plot(weekly_decomp_pr6_mult)
test_stat <- ur.kpss(weekly_decomp_pr6_mult$random, use.lag = "8")
summary(test_stat)
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 8 lags.
##
## Value of test-statistic is: 0.0289
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
The first plot shows the average of products sold for each day of month.
product6[,day_mean := mean(sold_count), by=.(day(event_date))]
(ggplot(product6,aes(x=event_date,y=day_mean))+
geom_line()+
theme_minimal()+
labs(title="Product 6",y="Average Sold Count by Days of Month", x="Event Date"))
Second plot shows the difference of average sold counts for each day of month in 2020 and 2021.
product6[,day_mean := mean(sold_count), by=.(year(event_date),day(event_date))]
(ggplot(product6,aes(x=event_date,y=day_mean))+
geom_line()+
theme_minimal()+
labs(title="Product 6",y="Average Sold Count by Days of Month", x="Event Date"))
Finally, when sold counts of each day of month is plotted for each 2020 and 2021. A clear pattern can be observed from the plot below.
pr6_day=product6[,list(day_mean), by=.(day(event_date), year(event_date))]
(ggplot(pr6_day,aes(x=day,y=day_mean))+
geom_line(aes(group=factor(year), color=factor(year)))+
theme_minimal()+
labs(title="Product 6",y="Average Sold Count by Days of Month", x="Event Date"))
Monthly decompositions of sold count of Product 6 are given below. Trend term does not include seasonality. Random term satisfies assumptions.
monthly_pr6 = ts(product6$sold_count, freq=30)
monthly_decomp_pr6 = decompose(monthly_pr6)
plot(monthly_decomp_pr6)
test_stat <- ur.kpss(monthly_decomp_pr6$random, use.lag = "12")
summary(test_stat)
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 12 lags.
##
## Value of test-statistic is: 0.0333
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
monthly_decomp_pr6_mult = decompose(monthly_pr6, type="multiplicative")
plot(monthly_decomp_pr6_mult)
test_stat <- ur.kpss(monthly_decomp_pr6_mult$random, use.lag = "12")
summary(test_stat)
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 12 lags.
##
## Value of test-statistic is: 0.0269
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
Additive weekly decomposition is chosen for Product 6 since its test statistics is the smallest and random term is the most similar to a white noise series.
ID of product 7 is “7061886” and it is bluetooth earphones by “Xiaomi”.
(ggplot(product7,aes(x=event_date,y=sold_count))+
geom_line()+
theme_minimal()+
labs(title="Product 7",y="Sold Count", x="Event Date"))
The first plot shows the average of products sold for each weekday.
product7[,wday_mean := mean(sold_count), by=.(Day)]
(ggplot(product7,aes(x=event_date,y=wday_mean))+
geom_line()+
theme_minimal()+
labs(title="Product 7",y="Average Sold Count by Weekdays", x="Event Date"))
Second plot shows the difference of average sold counts for each weekday in 2020 and 2021.
product7[,wday_mean := mean(sold_count), by=.(year(event_date),Day)]
(ggplot(product7,aes(x=event_date,y=wday_mean))+
geom_line()+
theme_minimal()+
labs(title="Product 7",y="Average Sold Count by Weekdays", x="Event Date"))
Third plot shows the average sold counts for each weekday for each month.
product7[,wday_mean := mean(sold_count), by=.(year(event_date),Day,Month)]
(ggplot(product7,aes(x=event_date,y=wday_mean))+
geom_line()+
theme_minimal()+
labs(title="Product 7",y="Average Sold Count by Weekdays", x="Event Date"))
Finally, when sold counts of each weekday is plotted for each year and month a pattern is observed.
pr7_wday=product7[,list(wday_mean), by=.(Day, Month, year(event_date))]
(ggplot(pr7_wday,aes(x=Day,y=wday_mean))+
geom_line(aes(group=factor(Month), color=Month))+
facet_grid(rows=pr7_wday$year)+
theme_minimal()+
labs(title="Product 7",y="Average Sold Count by Weekdays", x="Event Date"))
Time series of Prouduct 7 is decomposed weekly and additively. Seasonality can be observed. Random term has constant mean and more or less constant variance except in some periods that could be outliers.
weekly_pr7 = ts(product7$sold_count, freq=7)
weekly_decomp_pr7_add = decompose(weekly_pr7)
plot(weekly_decomp_pr7_add)
test_stat <- ur.kpss(weekly_decomp_pr7_add$random, use.lag = "8")
summary(test_stat)
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 8 lags.
##
## Value of test-statistic is: 0.0136
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
For multiplicative series variance of random term is more close to constant.
weekly_decomp_pr7_mult = decompose(weekly_pr7, type="multiplicative")
plot(weekly_decomp_pr7_mult)
test_stat <- ur.kpss(weekly_decomp_pr7_mult$random, use.lag = "8")
summary(test_stat)
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 8 lags.
##
## Value of test-statistic is: 0.1076
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
The first plot shows the average of products sold for each day of month.
product7[,day_mean := mean(sold_count), by=.(day(event_date))]
(ggplot(product7,aes(x=event_date,y=day_mean))+
geom_line()+
theme_minimal()+
labs(title="Product 7",y="Average Sold Count by Days of Month", x="Event Date"))
Second plot shows the difference of average sold counts for each day of month in 2020 and 2021.
product7[,day_mean := mean(sold_count), by=.(year(event_date),day(event_date))]
(ggplot(product7,aes(x=event_date,y=day_mean))+
geom_line()+
theme_minimal()+
labs(title="Product 7",y="Average Sold Count by Days of Month", x="Event Date"))
Finally, when sold counts of each day of month is plotted for each 2020 and 2021.
pr7_day=product7[,list(day_mean), by=.(day(event_date), year(event_date))]
(ggplot(pr7_day,aes(x=day,y=day_mean))+
geom_line(aes(group=factor(year), color=factor(year)))+
theme_minimal()+
labs(title="Product 7",y="Average Sold Count by Days of Month", x="Event Date"))
Monthly decompositions of sold count of Product 7 are given below. Trend term does not include seasonality. Random term satisfies assumptions.
monthly_pr7 = ts(product7$sold_count, freq=30)
monthly_decomp_pr7 = decompose(monthly_pr7)
plot(monthly_decomp_pr7)
test_stat <- ur.kpss(monthly_decomp_pr7$random, use.lag = "12")
summary(test_stat)
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 12 lags.
##
## Value of test-statistic is: 0.0196
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
monthly_decomp_pr7_mult = decompose(monthly_pr7, type="multiplicative")
plot(monthly_decomp_pr7_mult)
test_stat <- ur.kpss(monthly_decomp_pr7_mult$random, use.lag = "12")
summary(test_stat)
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 12 lags.
##
## Value of test-statistic is: 0.023
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
Additive weekly decomposition is chosen for Product 7 since its test statistics is the smallest and random term is the most similar to a white noise series.
ID of product 8 is “73318567” and it is a bikini top by “TRENDYOL MILLA”.
(ggplot(product8,aes(x=event_date,y=sold_count))+
geom_line()+
theme_minimal()+
labs(title="Product 8",y="Sold Count", x="Event Date"))
The first plot shows the average of products sold for each weekday.
product8[,wday_mean := mean(sold_count), by=.(Day)]
(ggplot(product8,aes(x=event_date,y=wday_mean))+
geom_line()+
theme_minimal()+
labs(title="Product 8",y="Average Sold Count by Weekdays", x="Event Date"))
Second plot shows the difference of average sold counts for each weekday in 2020 and 2021. There is not any sales record in 2020.
product8[,wday_mean := mean(sold_count), by=.(year(event_date),Day)]
(ggplot(product8,aes(x=event_date,y=wday_mean))+
geom_line()+
theme_minimal()+
labs(title="Product 8",y="Average Sold Count by Weekdays", x="Event Date"))
Third plot shows the average sold counts for each weekday for each month.
product8[,wday_mean := mean(sold_count), by=.(year(event_date),Day,Month)]
(ggplot(product8,aes(x=event_date,y=wday_mean))+
geom_line()+
theme_minimal()+
labs(title="Product 8",y="Average Sold Count by Weekdays", x="Event Date"))
Time series of Prouduct 8 is decomposed weekly and additively. Seasonality can be observed. Trend term specifies the periods where the sold count is non-zero.
weekly_pr8 = ts(product8$sold_count, freq=7)
weekly_decomp_pr8_add = decompose(weekly_pr8)
plot(weekly_decomp_pr8_add)
test_stat <- ur.kpss(weekly_decomp_pr8_add$random, use.lag = "8")
summary(test_stat)
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 8 lags.
##
## Value of test-statistic is: 0.0484
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
weekly_decomp_pr8_mult = decompose(weekly_pr8, type="multiplicative")
plot(weekly_decomp_pr8_mult)
test_stat <- ur.kpss(weekly_decomp_pr8_mult$random, use.lag = "8")
summary(test_stat)
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 8 lags.
##
## Value of test-statistic is: 0.0706
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
The first plot shows the average of products sold for each day of month.
product8[,day_mean := mean(sold_count), by=.(day(event_date))]
(ggplot(product8,aes(x=event_date,y=day_mean))+
geom_line()+
theme_minimal()+
labs(title="Product 8",y="Average Sold Count by Days of Month", x="Event Date"))
Second plot shows the difference of average sold counts for each day of month in 2020 and 2021.
product8[,day_mean := mean(sold_count), by=.(year(event_date),day(event_date))]
(ggplot(product8,aes(x=event_date,y=day_mean))+
geom_line()+
theme_minimal()+
labs(title="Product 8",y="Average Sold Count by Days of Month", x="Event Date"))
Finally, when sold counts of each day of month is plotted for each 2020 and 2021.
pr8_day=product8[,list(day_mean), by=.(day(event_date), year(event_date))]
(ggplot(pr8_day,aes(x=day,y=day_mean))+
geom_line(aes(group=factor(year), color=factor(year)))+
theme_minimal()+
labs(title="Product 8",y="Average Sold Count by Days of Month", x="Event Date"))
Monthly decompositions of sold count of Product 8 are given below.
monthly_pr8 = ts(product8$sold_count, freq=30)
monthly_decomp_pr8 = decompose(monthly_pr8)
plot(monthly_decomp_pr8)
test_stat <- ur.kpss(monthly_decomp_pr8$random, use.lag = "12")
summary(test_stat)
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 12 lags.
##
## Value of test-statistic is: 0.0313
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
monthly_decomp_pr8_mult = decompose(monthly_pr8, type="multiplicative")
plot(monthly_decomp_pr8_mult)
test_stat <- ur.kpss(monthly_decomp_pr8_mult$random, use.lag = "12")
summary(test_stat)
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 12 lags.
##
## Value of test-statistic is: 0.1496
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
Additive monthly decomposition is chosen for Product 8 since its test statistics is the smallest and random term is the most similar to a white noise series.
ID of product 9 is “85004” and it is face cleaner by “La Roche Posay”.
(ggplot(product9,aes(x=event_date,y=sold_count))+
geom_line()+
theme_minimal()+
labs(title="Product 9",y="Sold Count", x="Event Date"))
The first plot shows the average of products sold for each weekday.
product9[,wday_mean := mean(sold_count), by=.(Day)]
(ggplot(product9,aes(x=event_date,y=wday_mean))+
geom_line()+
theme_minimal()+
labs(title="Product 9",y="Average Sold Count by Weekdays", x="Event Date"))
Second plot shows the difference of average sold counts for each weekday in 2020 and 2021.
product9[,wday_mean := mean(sold_count), by=.(year(event_date),Day)]
(ggplot(product9,aes(x=event_date,y=wday_mean))+
geom_line()+
theme_minimal()+
labs(title="Product 9",y="Average Sold Count by Weekdays", x="Event Date"))
Third plot shows the average sold counts for each weekday for each month. Effect of each day of week is similar for each month.
product9[,wday_mean := mean(sold_count), by=.(year(event_date),Day,Month)]
(ggplot(product9,aes(x=event_date,y=wday_mean))+
geom_line()+
theme_minimal()+
labs(title="Product 9",y="Average Sold Count by Weekdays", x="Event Date"))
Finally, when sold counts of each weekday is plotted for each year and month a pattern is observed.
pr9_wday=product9[,list(wday_mean), by=.(Day, Month, year(event_date))]
(ggplot(pr9_wday,aes(x=Day,y=wday_mean))+
geom_line(aes(group=factor(Month), color=Month))+
facet_grid(rows=pr9_wday$year)+
theme_minimal()+
labs(title="Product 9",y="Average Sold Count by Weekdays", x="Event Date"))
Time series of Prouduct 9 is decomposed weekly and additively. Seasonality can be observed. Trend term has somewhat a seasonality. Random term has constant mean and more or less constant variance except in some periods that could be outliers.
weekly_pr9 = ts(product9$sold_count, freq=7)
weekly_decomp_pr9_add = decompose(weekly_pr9)
plot(weekly_decomp_pr9_add)
test_stat <- ur.kpss(weekly_decomp_pr9_add$random, use.lag = "8")
summary(test_stat)
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 8 lags.
##
## Value of test-statistic is: 0.0124
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
For multiplicative series variance of random term is more close to constant.
weekly_decomp_pr9_mult = decompose(weekly_pr9, type="multiplicative")
plot(weekly_decomp_pr9_mult)
test_stat <- ur.kpss(weekly_decomp_pr9_mult$random, use.lag = "8")
summary(test_stat)
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 8 lags.
##
## Value of test-statistic is: 0.1574
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
The first plot shows the average of products sold for each day of month.
product9[,day_mean := mean(sold_count), by=.(day(event_date))]
(ggplot(product9,aes(x=event_date,y=day_mean))+
geom_line()+
theme_minimal()+
labs(title="Product 9",y="Average Sold Count by Days of Month", x="Event Date"))
Second plot shows the difference of average sold counts for each day of month in 2020 and 2021.
product9[,day_mean := mean(sold_count), by=.(year(event_date),day(event_date))]
(ggplot(product9,aes(x=event_date,y=day_mean))+
geom_line()+
theme_minimal()+
labs(title="Product 9",y="Average Sold Count by Days of Month", x="Event Date"))
Finally, when sold counts of each day of month is plotted for each 2020 and 2021. A pattern could be observed.
pr9_day=product9[,list(day_mean), by=.(day(event_date), year(event_date))]
(ggplot(pr9_day,aes(x=day,y=day_mean))+
geom_line(aes(group=factor(year), color=factor(year)))+
theme_minimal()+
labs(title="Product 9",y="Average Sold Count by Days of Month", x="Event Date"))
Monthly decomposition of sold count of Product 9 are given below. Trend term does not include seasonality.
monthly_pr9 = ts(product9$sold_count, freq=30)
monthly_decomp_pr9 = decompose(monthly_pr9)
plot(monthly_decomp_pr9)
test_stat <- ur.kpss(monthly_decomp_pr9$random, use.lag = "12")
summary(test_stat)
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 12 lags.
##
## Value of test-statistic is: 0.0225
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
monthly_decomp_pr9_mult = decompose(monthly_pr9, type="multiplicative")
plot(monthly_decomp_pr9_mult)
test_stat <- ur.kpss(monthly_decomp_pr9_mult$random, use.lag = "12")
summary(test_stat)
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 12 lags.
##
## Value of test-statistic is: 0.024
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
Additive weekly decomposition is chosen for Product 9 since its test statistics is the smallest and random term is the most similar to a white noise series.
Product 1 is decomposed additively on a weekly level. Random term is plotted below.
weekly_pr1 = ts(product1$sold_count, freq=7)
weekly_decomp_pr1_add = decompose(weekly_pr1)
plot(weekly_decomp_pr1_add$random)
ACF plot of the random term shows sinusodial behavior and PACF plot has some spikes until lag 3 with some what an exponential decay behavior. ARIMA(p,0,q) models will be used.
acf(weekly_decomp_pr1_add$random, na.action = na.pass, lag.max = 50)
pacf(weekly_decomp_pr1_add$random, na.action = na.pass, lag.max = 50)
ARIMA(p,0,0) models with different p values will be tried. After p=8 AIC values starts to increase.
AIC(arima(weekly_decomp_pr1_add$random, order = c(1,0,0)))
## [1] 5668.436
AIC(arima(weekly_decomp_pr1_add$random, order = c(2,0,0)))
## [1] 5654.891
AIC(arima(weekly_decomp_pr1_add$random, order = c(3,0,0)))
## [1] 5625.933
AIC(arima(weekly_decomp_pr1_add$random, order = c(4,0,0)))
## [1] 5602.433
AIC(arima(weekly_decomp_pr1_add$random, order = c(5,0,0)))
## [1] 5599.56
AIC(arima(weekly_decomp_pr1_add$random, order = c(6,0,0)))
## [1] 5596.376
AIC(arima(weekly_decomp_pr1_add$random, order = c(7,0,0)))
## [1] 5589.928
AIC(arima(weekly_decomp_pr1_add$random, order = c(8,0,0)))
## [1] 5584.987
AIC(arima(weekly_decomp_pr1_add$random, order = c(9,0,0)))
## [1] 5586.433
ARIMA (0,0,q) models with different q values will be tried. After q=9 AIC values starts to increase
AIC(arima(weekly_decomp_pr1_add$random, order = c(0,0,1)))
## [1] 5662.934
AIC(arima(weekly_decomp_pr1_add$random, order = c(0,0,2)))
## [1] 5662.562
AIC(arima(weekly_decomp_pr1_add$random, order = c(0,0,3)))
## [1] 5588.176
AIC(arima(weekly_decomp_pr1_add$random, order = c(0,0,4)))
## [1] 5555.225
AIC(arima(weekly_decomp_pr1_add$random, order = c(0,0,5)))
## [1] 5552.875
AIC(arima(weekly_decomp_pr1_add$random, order = c(0,0,6)))
## [1] 5548.857
AIC(arima(weekly_decomp_pr1_add$random, order = c(0,0,7)))
## [1] 5505.549
AIC(arima(weekly_decomp_pr1_add$random, order = c(0,0,8)))
## [1] 5498.04
AIC(arima(weekly_decomp_pr1_add$random, order = c(0,0,9)))
## [1] 5498.274
AIC(arima(weekly_decomp_pr1_add$random, order = c(0,0,10)))
## [1] 5499.914
AIC(arima(weekly_decomp_pr1_add$random, order = c(0,0,11)))
## [1] 5501.853
Combining two models as ARIMA(8,0,9)
model1 = arima(weekly_decomp_pr1_add$random, order = c(8,0,9))
AIC(model1)
## [1] 5503.671
Fitting the model
pr1_fitted = weekly_decomp_pr1_add$random - residuals(model1)
pr1_fitted_transformed = pr1_fitted+weekly_decomp_pr1_add$seasonal+weekly_decomp_pr1_add$trend
pr1_predictions=cbind(sold_count=weekly_pr1,fitted=pr1_fitted_transformed)
pr1_predictions=as.data.table(pr1_predictions)
pr1_predictions$date=product1$event_date
ggplot(pr1_predictions, aes(x=date, y=sold_count))+geom_line()+geom_line(aes(y=fitted),col="red")
## Warning: Removed 6 row(s) containing missing values (geom_path).
accu_1 = accu(pr1_predictions$sold_count, pr1_predictions$fitted)
print(accu_1)
## n mean FBias MAPE RMSE MAD MADP
## 1 372 858.207 0.001146426 0.002160051 0.9919027 0.983871 0.001146426
## WMAPE
## 1 0.001146426
Product 2 is decomposed additively on a weekly level. Random term is plotted below.
weekly_pr2= ts(product2$sold_count, freq=7)
weekly_decomp_pr2_add = decompose(weekly_pr2)
plot(weekly_decomp_pr2_add$random)
ACF plot of the random term shows sinusodial behavior and PACF plot has some spikes until lag 4 with some what an exponential decay behavior. Behaviour is similar to the product 1. ARIMA (p,0,q) models will be used.
acf(weekly_decomp_pr2_add$random, na.action = na.pass, lag.max = 50)
pacf(weekly_decomp_pr2_add$random, na.action = na.pass, lag.max = 50)
ARIMA(p,0,0) models with different p values will be tried. After p=8 AIC values starts to increase.
AIC(arima(weekly_decomp_pr2_add$random, order = c(1,0,0)))
## [1] 2303.525
AIC(arima(weekly_decomp_pr2_add$random, order = c(2,0,0)))
## [1] 2255.9
AIC(arima(weekly_decomp_pr2_add$random, order = c(3,0,0)))
## [1] 2229.862
AIC(arima(weekly_decomp_pr2_add$random, order = c(4,0,0)))
## [1] 2231.27
AIC(arima(weekly_decomp_pr2_add$random, order = c(5,0,0)))
## [1] 2195.715
AIC(arima(weekly_decomp_pr2_add$random, order = c(6,0,0)))
## [1] 2142.98
AIC(arima(weekly_decomp_pr2_add$random, order = c(7,0,0)))
## [1] 2137.768
AIC(arima(weekly_decomp_pr2_add$random, order = c(8,0,0)))
## [1] 2120.578
AIC(arima(weekly_decomp_pr2_add$random, order = c(9,0,0)))
## [1] 2122.574
ARIMA (0,0,q) models with different q values will be tried. After q=7 AIC values starts to increase
AIC(arima(weekly_decomp_pr2_add$random, order = c(0,0,1)))
## [1] 2291.776
AIC(arima(weekly_decomp_pr2_add$random, order = c(0,0,2)))
## [1] 2210.957
AIC(arima(weekly_decomp_pr2_add$random, order = c(0,0,3)))
## [1] 2123.115
AIC(arima(weekly_decomp_pr2_add$random, order = c(0,0,4)))
## [1] 2118.695
AIC(arima(weekly_decomp_pr2_add$random, order = c(0,0,5)))
## [1] 2107.705
AIC(arima(weekly_decomp_pr2_add$random, order = c(0,0,6)))
## [1] 2101.97
AIC(arima(weekly_decomp_pr2_add$random, order = c(0,0,7)))
## [1] 2093.406
AIC(arima(weekly_decomp_pr2_add$random, order = c(0,0,8)))
## [1] 2094.712
Combining two models as ARIMA(8,0,7)
model2 = arima(weekly_decomp_pr2_add$random, order = c(8,0,7))
AIC(model2)
## [1] 2071.746
Fitting the model
pr2_fitted = weekly_decomp_pr2_add$random - residuals(model2)
pr2_fitted_transformed = pr2_fitted+weekly_decomp_pr2_add$seasonal+weekly_decomp_pr2_add$trend
pr2_predictions=cbind(sold_count=weekly_pr2,fitted=pr2_fitted_transformed)
pr2_predictions=as.data.table(pr2_predictions)
pr2_predictions$date=product2$event_date
ggplot(pr2_predictions, aes(x=date, y=sold_count))+geom_line()+geom_line(aes(y=fitted),col="red")
## Warning: Removed 6 row(s) containing missing values (geom_path).
accu_2 = accu(pr2_predictions$sold_count, pr2_predictions$fitted)
print(accu_2)
## n mean FBias MAPE RMSE MAD MADP WMAPE
## 1 372 11.01882 0.08929007 Inf 0.9919027 0.983871 0.08929007 0.08929007
Product 3 is decomposed additively on a weekly level. Random term is plotted below.
weekly_pr3= ts(product3$sold_count, freq=7)
weekly_decomp_pr3_add = decompose(weekly_pr3)
plot(weekly_decomp_pr3_add$random)
ACF plot of the random term shows sinusodial behavior as well as the PACF plot. ARIMA(p,0,q) models will be used.
acf(weekly_decomp_pr3_add$random, na.action = na.pass, lag.max = 50)
pacf(weekly_decomp_pr3_add$random, na.action = na.pass, lag.max = 50)
ARIMA(p,0,0) models with different p values will be tried. After p=10 AIC values starts to increase.
AIC(arima(weekly_decomp_pr3_add$random, order = c(1,0,0)))
## [1] 3621.813
AIC(arima(weekly_decomp_pr3_add$random, order = c(2,0,0)))
## [1] 3579.341
AIC(arima(weekly_decomp_pr3_add$random, order = c(3,0,0)))
## [1] 3563.754
AIC(arima(weekly_decomp_pr3_add$random, order = c(4,0,0)))
## [1] 3560.052
AIC(arima(weekly_decomp_pr3_add$random, order = c(5,0,0)))
## [1] 3539.706
AIC(arima(weekly_decomp_pr3_add$random, order = c(6,0,0)))
## [1] 3531.944
AIC(arima(weekly_decomp_pr3_add$random, order = c(7,0,0)))
## [1] 3530.479
AIC(arima(weekly_decomp_pr3_add$random, order = c(8,0,0)))
## [1] 3522.776
AIC(arima(weekly_decomp_pr3_add$random, order = c(9,0,0)))
## [1] 3519.717
AIC(arima(weekly_decomp_pr3_add$random, order = c(10,0,0)))
## [1] 3521.508
AIC(arima(weekly_decomp_pr3_add$random, order = c(11,0,0)))
## [1] 3523.342
ARIMA (0,0,q) models with different q values will be tried. q=3 will be used since the decrease in AIC values is very small after 3.
AIC(arima(weekly_decomp_pr3_add$random, order = c(0,0,1)))
## [1] 3607.011
AIC(arima(weekly_decomp_pr3_add$random, order = c(0,0,2)))
## [1] 3558.909
AIC(arima(weekly_decomp_pr3_add$random, order = c(0,0,3)))
## [1] 3489.457
AIC(arima(weekly_decomp_pr3_add$random, order = c(0,0,4)))
## [1] 3489.448
AIC(arima(weekly_decomp_pr3_add$random, order = c(0,0,5)))
## [1] 3488.446
Combining two models as ARIMA(10,0,3)
model3 = arima(weekly_decomp_pr3_add$random, order = c(10,0,3))
## Warning in arima(weekly_decomp_pr3_add$random, order = c(10, 0, 3)): possible
## convergence problem: optim gave code = 1
AIC(model3)
## [1] 3485.596
Fitting the model
pr3_fitted = weekly_decomp_pr3_add$random - residuals(model3)
pr3_fitted_transformed = pr3_fitted+weekly_decomp_pr3_add$seasonal+weekly_decomp_pr3_add$trend
pr3_predictions=cbind(sold_count=weekly_pr3,fitted=pr3_fitted_transformed)
pr3_predictions=as.data.table(pr3_predictions)
pr3_predictions$date=product3$event_date
ggplot(pr3_predictions, aes(x=date, y=sold_count))+geom_line()+geom_line(aes(y=fitted),col="red")
## Warning: Removed 6 row(s) containing missing values (geom_path).
accu_3 = accu(pr3_predictions$sold_count, pr3_predictions$fitted)
print(accu_3)
## n mean FBias MAPE RMSE MAD MADP WMAPE
## 1 372 92.20968 0.01066993 Inf 0.9919027 0.983871 0.01066993 0.01066993
Product 4 is decomposed additively on a weekly level. Random term is plotted below.
weekly_pr4= ts(product4$sold_count, freq=7)
weekly_decomp_pr4_add = decompose(weekly_pr4)
plot(weekly_decomp_pr4_add$random)
ACF plot of the random term shows sinusodial behavior as well and PACF plot has spikes. There is a significant spike in lag 2 and 4. Therefore ARIMA(2,0,0) and ARIMA (4,0,0) will be tried.
acf(weekly_decomp_pr4_add$random, na.action = na.pass, lag.max = 50)
pacf(weekly_decomp_pr4_add$random, na.action = na.pass, lag.max = 50)
ARIMA(2,0,0)
AIC(arima(weekly_decomp_pr4_add$random, order = c(2,0,0)))
## [1] 5056.963
ARIMA(4,0,0)
AIC(arima(weekly_decomp_pr4_add$random, order = c(4,0,0)))
## [1] 5025.255
ARIMA(4,0,0) model will be used since it has lower AIC value.
ARIMA(0,0,q) models shows that when q=0 AIC value is smaller.
AIC(arima(weekly_decomp_pr4_add$random, order = c(0,0,1)))
## [1] 5072.758
AIC(arima(weekly_decomp_pr4_add$random, order = c(0,0,2)))
## [1] 5074.541
AIC(arima(weekly_decomp_pr4_add$random, order = c(0,0,3)))
## [1] 4954.022
AIC(arima(weekly_decomp_pr4_add$random, order = c(0,0,4)))
## [1] 4952.369
AIC(arima(weekly_decomp_pr4_add$random, order = c(0,0,5)))
## [1] 4953.854
Combining two models as ARIMA(4,0,4)
model4 = arima(weekly_decomp_pr4_add$random, order = c(4,0,4))
AIC(model4)
## [1] 4910.234
Fitting the model
pr4_fitted = weekly_decomp_pr4_add$random - residuals(model4)
pr4_fitted_transformed = pr4_fitted+weekly_decomp_pr4_add$seasonal+weekly_decomp_pr4_add$trend
pr4_predictions=cbind(sold_count=weekly_pr4,fitted=pr4_fitted_transformed)
pr4_predictions=as.data.table(pr4_predictions)
pr4_predictions$date=product4$event_date
ggplot(pr4_predictions, aes(x=date, y=sold_count))+geom_line()+geom_line(aes(y=fitted),col="red")
## Warning: Removed 6 row(s) containing missing values (geom_path).
accu_4 = accu(pr4_predictions$sold_count, pr4_predictions$fitted)
print(accu_4)
## n mean FBias MAPE RMSE MAD MADP
## 1 372 385.1452 0.002554546 0.004564582 0.9919027 0.983871 0.002554546
## WMAPE
## 1 0.002554546
Product 5 is decomposed additively on a weekly level. Random term is plotted below.
weekly_pr5= ts(product5$sold_count, freq=7)
weekly_decomp_pr5_add = decompose(weekly_pr5)
plot(weekly_decomp_pr5_add$random)
ACF plot of the random term has a spike at a lag smaller than 1. sinusodial. PACF plot shows exponential decay at some point and sinusodial behaviour after.
acf(weekly_decomp_pr5_add$random, na.action = na.pass, lag.max = 50)
pacf(weekly_decomp_pr5_add$random, na.action = na.pass, lag.max = 50)
ARIMA(0,0,q) models with q values (1,2,3,4,5) will be tried. Smallest AIC value belongs to ARIMA(0,0,5)
AIC(arima(weekly_decomp_pr5_add$random, order = c(0,0,1)))
## [1] 1779.282
AIC(arima(weekly_decomp_pr5_add$random, order = c(0,0,2)))
## [1] 1623.483
AIC(arima(weekly_decomp_pr5_add$random, order = c(0,0,3)))
## [1] 1622.628
AIC(arima(weekly_decomp_pr5_add$random, order = c(0,0,4)))
## [1] 1608.534
AIC(arima(weekly_decomp_pr5_add$random, order = c(0,0,5)))
## [1] 1548.643
ARIMA(p,0,0) will be tried to see if there is a model with smaller AIC. models shows that when q=0 AIC value is smaller.
AIC(arima(weekly_decomp_pr5_add$random, order = c(1,0,5)))
## [1] 1550.419
AIC(arima(weekly_decomp_pr5_add$random, order = c(3,0,5)))
## [1] 1552.771
AIC(arima(weekly_decomp_pr5_add$random, order = c(4,0,5)))
## [1] 1552.582
AIC(arima(weekly_decomp_pr5_add$random, order = c(5,0,5)))
## [1] 1553.968
All AIC values are bigger therefore ARIMA(0,0,5) will be used.
model5 = arima(weekly_decomp_pr5_add$random, order = c(0,0,5))
AIC(model5)
## [1] 1548.643
Fitting the model
pr5_fitted = weekly_decomp_pr5_add$random - residuals(model5)
pr5_fitted_transformed = pr5_fitted+weekly_decomp_pr5_add$seasonal+weekly_decomp_pr5_add$trend
pr5_predictions=cbind(sold_count=weekly_pr5,fitted=pr5_fitted_transformed)
pr5_predictions=as.data.table(pr5_predictions)
pr5_predictions$date=product5$event_date
ggplot(pr5_predictions, aes(x=date, y=sold_count))+geom_line()+geom_line(aes(y=fitted),col="red")
## Warning: Removed 6 row(s) containing missing values (geom_path).
accu_5 = accu(pr5_predictions$sold_count, pr5_predictions$fitted)
print(accu_5)
## n mean FBias MAPE RMSE MAD MADP WMAPE
## 1 372 0.8306452 1.184466 NaN 0.9919027 0.983871 1.184466 1.184466
Product 6 is decomposed additively on a weekly level. Random term is plotted below.
weekly_pr6= ts(product6$sold_count, freq=7)
weekly_decomp_pr6_add = decompose(weekly_pr6)
plot(weekly_decomp_pr6_add$random)
ACF plot of the random term has a spike at a lag smaller than 1. sinusodial. PACF plot shows exponential decay.
acf(weekly_decomp_pr6_add$random, na.action = na.pass, lag.max = 50)
pacf(weekly_decomp_pr6_add$random, na.action = na.pass, lag.max = 50)
ARIMA(0,0,q) models with q values (1,2,3,4,5) will be tried. ARIMA(0,0,3) model will be used since the decrease in AIC values gets smaller after that point.
AIC(arima(weekly_decomp_pr6_add$random, order = c(0,0,1)))
## [1] 4552.394
AIC(arima(weekly_decomp_pr6_add$random, order = c(0,0,2)))
## [1] 4500.585
AIC(arima(weekly_decomp_pr6_add$random, order = c(0,0,3)))
## [1] 4422.014
AIC(arima(weekly_decomp_pr6_add$random, order = c(0,0,4)))
## [1] 4422.395
AIC(arima(weekly_decomp_pr6_add$random, order = c(0,0,5)))
## [1] 4415.996
ARIMA(p,0,0) will be tried to see if there is a model with smaller AIC. ARIMA(5,0,3) model has the smallest AIC value.
AIC(arima(weekly_decomp_pr6_add$random, order = c(1,0,3)))
## [1] 4423.024
AIC(arima(weekly_decomp_pr6_add$random, order = c(2,0,3)))
## [1] 4418.547
AIC(arima(weekly_decomp_pr6_add$random, order = c(4,0,3)))
## [1] 4411.558
AIC(arima(weekly_decomp_pr6_add$random, order = c(5,0,3)))
## [1] 4408.108
ARIMA(5,0,3) will be used.
model6 = arima(weekly_decomp_pr6_add$random, order = c(5,0,3))
AIC(model6)
## [1] 4408.108
Fitting the model
pr6_fitted = weekly_decomp_pr6_add$random - residuals(model6)
pr6_fitted_transformed = pr6_fitted+weekly_decomp_pr6_add$seasonal+weekly_decomp_pr6_add$trend
pr6_predictions=cbind(sold_count=weekly_pr6,fitted=pr6_fitted_transformed)
pr6_predictions=as.data.table(pr6_predictions)
pr6_predictions$date=product6$event_date
ggplot(pr6_predictions, aes(x=date, y=sold_count))+geom_line()+geom_line(aes(y=fitted),col="red")
## Warning: Removed 6 row(s) containing missing values (geom_path).
accu_6 = accu(pr6_predictions$sold_count, pr6_predictions$fitted)
print(accu_6)
## n mean FBias MAPE RMSE MAD MADP
## 1 372 392.0323 0.002509668 0.003424822 0.9919027 0.983871 0.002509668
## WMAPE
## 1 0.002509668
Product 7 is decomposed additively on a weekly level. Random term is plotted below.
weekly_pr7= ts(product7$sold_count, freq=7)
weekly_decomp_pr7_add = decompose(weekly_pr7)
plot(weekly_decomp_pr7_add$random)
ACF plot of the random term has a spike at around lag 2.5 .PACF plot shows exponential decay.
acf(weekly_decomp_pr7_add$random, na.action = na.pass, lag.max = 50)
pacf(weekly_decomp_pr7_add$random, na.action = na.pass, lag.max = 50)
ARIMA(0,0,q) models with q values (1,2,3,4,5) will be tried. ARIMA(0,0,4) model will be used because AIC values starts to increase.
AIC(arima(weekly_decomp_pr7_add$random, order = c(0,0,1)))
## [1] 3225.885
AIC(arima(weekly_decomp_pr7_add$random, order = c(0,0,2)))
## [1] 3220.683
AIC(arima(weekly_decomp_pr7_add$random, order = c(0,0,3)))
## [1] 3132.23
AIC(arima(weekly_decomp_pr7_add$random, order = c(0,0,4)))
## [1] 3109.986
AIC(arima(weekly_decomp_pr7_add$random, order = c(0,0,5)))
## [1] 3111.038
ARIMA(p,0,0) will be tried to see if there is a model with smaller AIC. ARIMA(2,0,4) model has the smallest AIC value.
AIC(arima(weekly_decomp_pr7_add$random, order = c(1,0,4)))
## [1] 3111.557
AIC(arima(weekly_decomp_pr7_add$random, order = c(2,0,4)))
## [1] 3064.732
AIC(arima(weekly_decomp_pr7_add$random, order = c(3,0,4)))
## [1] 3082.122
AIC(arima(weekly_decomp_pr7_add$random, order = c(4,0,4)))
## [1] 3073.936
ARIMA(2,0,4) will be used.
model7 = arima(weekly_decomp_pr7_add$random, order = c(2,0,4))
AIC(model7)
## [1] 3064.732
Fitting the model
pr7_fitted = weekly_decomp_pr7_add$random - residuals(model7)
pr7_fitted_transformed = pr7_fitted+weekly_decomp_pr7_add$seasonal+weekly_decomp_pr7_add$trend
pr7_predictions=cbind(sold_count=weekly_pr7,fitted=pr7_fitted_transformed)
pr7_predictions=as.data.table(pr7_predictions)
pr7_predictions$date=product7$event_date
ggplot(pr7_predictions, aes(x=date, y=sold_count))+geom_line()+geom_line(aes(y=fitted),col="red")
## Warning: Removed 6 row(s) containing missing values (geom_path).
accu_7 = accu(pr7_predictions$sold_count, pr7_predictions$fitted)
print(accu_7)
## n mean FBias MAPE RMSE MAD MADP WMAPE
## 1 372 39.72581 0.02476654 0.04023471 0.9919027 0.983871 0.02476654 0.02476654
Product 8 is decomposed additively on a weekly level. Random term is plotted below.
weekly_pr8= ts(product8$sold_count, freq=7)
weekly_decomp_pr8_add = decompose(weekly_pr8)
plot(weekly_decomp_pr8_add$random)
ACF plot of the random shows sinusodial behavior.PACF plot has spikes around lag 1 and 1.5 and none beyond.
acf(weekly_decomp_pr8_add$random, na.action = na.pass, lag.max = 50)
pacf(weekly_decomp_pr8_add$random, na.action = na.pass, lag.max = 50)
ARIMA(p,0,0) models with different p values will be tried. ARIMA(9,0,0) model will be used because AIC values starts to increase.
AIC(arima(weekly_decomp_pr8_add$random, order = c(1,0,0)))
## [1] 2657.071
AIC(arima(weekly_decomp_pr8_add$random, order = c(2,0,0)))
## [1] 2617.024
AIC(arima(weekly_decomp_pr8_add$random, order = c(3,0,0)))
## [1] 2613.855
AIC(arima(weekly_decomp_pr8_add$random, order = c(4,0,0)))
## [1] 2607.959
AIC(arima(weekly_decomp_pr8_add$random, order = c(5,0,0)))
## [1] 2564.091
AIC(arima(weekly_decomp_pr8_add$random, order = c(6,0,0)))
## [1] 2557.697
AIC(arima(weekly_decomp_pr8_add$random, order = c(7,0,0)))
## [1] 2549.243
AIC(arima(weekly_decomp_pr8_add$random, order = c(8,0,0)))
## [1] 2546.981
AIC(arima(weekly_decomp_pr8_add$random, order = c(9,0,0)))
## [1] 2530.835
AIC(arima(weekly_decomp_pr8_add$random, order = c(10,0,0)))
## [1] 2532.442
ARIMA(0,0,q) will be tried to see if there is a model with smaller AIC. ARIMA(9,0,3) model has the smallest AIC value.
AIC(arima(weekly_decomp_pr8_add$random, order = c(9,0,1)))
## [1] 2531.421
AIC(arima(weekly_decomp_pr8_add$random, order = c(9,0,2)))
## [1] 2509.796
AIC(arima(weekly_decomp_pr8_add$random, order = c(9,0,3)))
## [1] 2504.486
AIC(arima(weekly_decomp_pr8_add$random, order = c(9,0,4)))
## [1] 2506.455
ARIMA(9,0,3) will be used.
model8 = arima(weekly_decomp_pr8_add$random, order = c(9,0,3))
AIC(model8)
## [1] 2504.486
Fitting the model
pr8_fitted = weekly_decomp_pr8_add$random - residuals(model8)
pr8_fitted_transformed = pr8_fitted+weekly_decomp_pr8_add$seasonal+weekly_decomp_pr8_add$trend
pr8_predictions=cbind(sold_count=weekly_pr8,fitted=pr8_fitted_transformed)
pr8_predictions=as.data.table(pr8_predictions)
pr8_predictions$date=product8$event_date
ggplot(pr8_predictions, aes(x=date, y=sold_count))+geom_line()+geom_line(aes(y=fitted),col="red")
## Warning: Removed 6 row(s) containing missing values (geom_path).
accu_8 = accu(pr8_predictions$sold_count, pr8_predictions$fitted)
print(accu_8)
## n mean FBias MAPE RMSE MAD MADP WMAPE
## 1 372 15.79301 0.06229787 NaN 0.9919027 0.983871 0.06229787 0.06229787
Product 9 is decomposed additively on a weekly level. Random term is plotted below.
weekly_pr9= ts(product9$sold_count, freq=7)
weekly_decomp_pr9_add = decompose(weekly_pr9)
plot(weekly_decomp_pr9_add$random)
ACF plot of the random shows sinusodial behavior.PACF plot has spikes until lag 2.
acf(weekly_decomp_pr9_add$random, na.action = na.pass, lag.max = 50)
pacf(weekly_decomp_pr9_add$random, na.action = na.pass, lag.max = 50)
ARIMA(p,0,0) models with different p values will be tried. ARIMA(8,0,0) model will be used because AIC values starts to increase.
AIC(arima(weekly_decomp_pr9_add$random, order = c(1,0,0)))
## [1] 3579.892
AIC(arima(weekly_decomp_pr9_add$random, order = c(2,0,0)))
## [1] 3570.061
AIC(arima(weekly_decomp_pr9_add$random, order = c(3,0,0)))
## [1] 3532.722
AIC(arima(weekly_decomp_pr9_add$random, order = c(4,0,0)))
## [1] 3530.453
AIC(arima(weekly_decomp_pr9_add$random, order = c(5,0,0)))
## [1] 3526.35
AIC(arima(weekly_decomp_pr9_add$random, order = c(6,0,0)))
## [1] 3521.961
AIC(arima(weekly_decomp_pr9_add$random, order = c(7,0,0)))
## [1] 3507.079
AIC(arima(weekly_decomp_pr9_add$random, order = c(8,0,0)))
## [1] 3503.417
AIC(arima(weekly_decomp_pr9_add$random, order = c(9,0,0)))
## [1] 3504.501
AIC(arima(weekly_decomp_pr9_add$random, order = c(10,0,0)))
## [1] 3506.491
ARIMA(0,0,q) will be tried to see if there is a model with smaller AIC. ARIMA(8,0,5) model has the smallest AIC value.
AIC(arima(weekly_decomp_pr9_add$random, order = c(8,0,1)))
## [1] 3443.978
AIC(arima(weekly_decomp_pr9_add$random, order = c(8,0,2)))
## [1] 3445.951
AIC(arima(weekly_decomp_pr9_add$random, order = c(8,0,4)))
## [1] 3443.681
AIC(arima(weekly_decomp_pr9_add$random, order = c(8,0,5)))
## [1] 3429.3
AIC(arima(weekly_decomp_pr9_add$random, order = c(8,0,6)))
## [1] 3444.711
ARIMA(8,0,5) will be used.
model9 = arima(weekly_decomp_pr9_add$random, order = c(8,0,5))
AIC(model9)
## [1] 3429.3
Fitting the model
pr9_fitted = weekly_decomp_pr9_add$random - residuals(model9)
pr9_fitted_transformed = pr9_fitted+weekly_decomp_pr9_add$seasonal+weekly_decomp_pr9_add$trend
pr9_predictions=cbind(sold_count=weekly_pr9,fitted=pr9_fitted_transformed)
pr9_predictions=as.data.table(pr9_predictions)
pr9_predictions$date=product9$event_date
ggplot(pr9_predictions, aes(x=date, y=sold_count))+geom_line()+geom_line(aes(y=fitted),col="red")
## Warning: Removed 6 row(s) containing missing values (geom_path).
accu_9 = accu(pr9_predictions$sold_count, pr9_predictions$fitted)
print(accu_9)
## n mean FBias MAPE RMSE MAD MADP WMAPE
## 1 372 74.19624 0.01326039 0.02099574 0.9919027 0.983871 0.01326039 0.01326039
There are 11 regressors in the data sets for each product.
Price column denotes the price of the product on the given day. Changes in price could be useful for forecasting sales counts. However there are missing observations in this column. Let’s check correlation of sales count and price for each product. Several of the correlation functions have returned NA’s due to missing data. Price can not be used as regressor for such products. For Product 4 and Product 6 price has correlation with sold count even though it is not very significant. This could be observed from both the plots and correlation tests.
(ggplot(data=product1, aes(x=price, y=sold_count))+geom_point()+
theme_minimal()+
labs(title="Product 1",y="Sold Count", x="Price"))
cor(product1$price, product1$sold_count)
## [1] -0.2583435
(ggplot(data=product2, aes(x=price, y=sold_count))+geom_point()+
theme_minimal()+
labs(title="Product 2",y="Sold Count", x="Price"))
cor(product2$price, product2$sold_count)
## [1] NA
(ggplot(data=product3, aes(x=price, y=sold_count))+geom_point()+
theme_minimal()+
labs(title="Product 3",y="Sold Count", x="Price"))
cor(product3$price, product3$sold_count)
## [1] NA
(ggplot(data=product4, aes(x=price, y=sold_count))+geom_point()+
theme_minimal()+
labs(title="Product 4",y="Sold Count", x="Price"))
cor(product4$price, product4$sold_count)
## [1] -0.5721201
(ggplot(data=product5, aes(x=price, y=sold_count))+geom_point()+
theme_minimal()+
labs(title="Product 5",y="Sold Count", x="Price"))
cor(product5$price, product5$sold_count)
## [1] NA
(ggplot(data=product6, aes(x=price, y=sold_count))+geom_point()+
theme_minimal()+
labs(title="Product 6",y="Sold Count", x="Price"))
cor(product6$price, product6$sold_count)
## [1] -0.5127488
(ggplot(data=product7, aes(x=price, y=sold_count))+geom_point()+
theme_minimal()+
labs(title="Product 7",y="Sold Count", x="Price"))
cor(product7$price, product7$sold_count)
## [1] -0.32637
(ggplot(data=product8, aes(x=price, y=sold_count))+geom_point()+
theme_minimal()+
labs(title="Product 8",y="Sold Count", x="Price"))
cor(product8$price, product8$sold_count)
## [1] NA
(ggplot(data=product9, aes(x=price, y=sold_count))+geom_point()+
theme_minimal()+
labs(title="Product 9",y="Sold Count", x="Price"))
cor(product9$price, product9$sold_count)
## [1] -0.2299915
Visit counts is 0 for most of the products until about February 2021. By visiual inspection there is not any correlation observed excep Product 8 which also has 0 sold count until abour February 2021. Visit count can be used as a regressor for Product 8.
ggplot(data=my_data)+geom_line(aes(x=event_date, y=visit_count))+facet_grid(rows="product", scales = "free")+ theme_minimal()+
labs(title="Visit Counts")
ggplot(data=my_data)+geom_line(aes(x=event_date, y=sold_count))+facet_grid(rows="product", scales ="free")+ theme_minimal()+
labs(title="Sold Counts")
Correlation between visit count and sold count for product 8 shows that two variables are highly correlated.
(ggplot(data=product8, aes(x=visit_count, y=sold_count))+geom_point()+
theme_minimal()+
labs(title="Product 8",y="Sold Count", x="Visit Count"))
cor(product8$visit_count, product8$sold_count)
## [1] 0.8969791
By visual inspection for almost all products basket count and sold count seems to be correlated.
ggplot(data=my_data)+geom_line(aes(x=event_date, y=basket_count))+facet_grid(rows="product", scales = "free")+ theme_minimal()+
labs(title="Basket Counts")
ggplot(data=my_data)+geom_line(aes(x=event_date, y=sold_count))+facet_grid(rows="product", scales ="free")+ theme_minimal()+
labs(title="Sold Counts")
To check plots and correlation coefficients
(ggplot(data=product1, aes(x=basket_count, y=sold_count))+geom_point()+
theme_minimal()+
labs(title="Product 1",y="Sold Count", x="Basket Count"))
cor(product1$basket_count, product1$sold_count)
## [1] 0.8372097
(ggplot(data=product2, aes(x=basket_count, y=sold_count))+geom_point()+
theme_minimal()+
labs(title="Product 2",y="Sold Count", x="Basket Count"))
cor(product2$basket_count, product2$sold_count)
## [1] 0.95016
(ggplot(data=product3, aes(x=basket_count, y=sold_count))+geom_point()+
theme_minimal()+
labs(title="Product 3",y="Sold Count", x="Basket Count"))
cor(product3$basket_count, product3$sold_count)
## [1] 0.9515283
(ggplot(data=product4, aes(x=basket_count, y=sold_count))+geom_point()+
theme_minimal()+
labs(title="Product 4",y="Sold Count", x="Basket Count"))
cor(product4$basket_count, product4$sold_count)
## [1] 0.8872024
(ggplot(data=product5, aes(x=basket_count, y=sold_count))+geom_point()+
theme_minimal()+
labs(title="Product 5",y="Sold Count", x="Basket Count"))
cor(product5$basket_count, product5$sold_count)
## [1] 0.9011925
(ggplot(data=product6, aes(x=basket_count, y=sold_count))+geom_point()+
theme_minimal()+
labs(title="Product 6",y="Sold Count", x="Basket Count"))
cor(product6$basket_count, product6$sold_count)
## [1] 0.8656776
(ggplot(data=product7, aes(x=basket_count, y=sold_count))+geom_point()+
theme_minimal()+
labs(title="Product 7",y="Sold Count", x="Basket Count"))
cor(product7$basket_count, product7$sold_count)
## [1] 0.8668665
(ggplot(data=product8, aes(x=basket_count, y=sold_count))+geom_point()+
theme_minimal()+
labs(title="Product 8",y="Sold Count", x="Basket Count"))
cor(product8$basket_count, product8$sold_count)
## [1] 0.980521
(ggplot(data=product9, aes(x=basket_count, y=sold_count))+geom_point()+
theme_minimal()+
labs(title="Product 9",y="Sold Count", x="Basket Count"))
cor(product9$basket_count, product9$sold_count)
## [1] 0.8194234
As observed in previous plots basket count could be used as a regressor for all products.
Similar to visit count favored count is 0 for all products until the beggining of 2021. By visual inspection there is not much correlation between sold count and favored count.
ggplot(data=my_data)+geom_line(aes(x=event_date, y=favored_count))+facet_grid(rows="product", scales = "free")+ theme_minimal()+
labs(title="Favored Count")
ggplot(data=my_data)+geom_line(aes(x=event_date, y=sold_count))+facet_grid(rows="product", scales ="free")+ theme_minimal()+
labs(title="Sold Counts")
Correlation coefficients are computed as
cor(product1$favored_count, product1$sold_count)
## [1] 0.1687429
cor(product2$favored_count, product2$sold_count)
## [1] 0.7257766
cor(product3$favored_count, product3$sold_count)
## [1] 0.7896215
cor(product4$favored_count, product4$sold_count)
## [1] 0.2832415
cor(product5$favored_count, product5$sold_count)
## [1] 0.03265166
cor(product6$favored_count, product6$sold_count)
## [1] 0.2280804
cor(product7$favored_count, product7$sold_count)
## [1] -0.1450431
cor(product8$favored_count, product8$sold_count)
## [1] 0.776343
cor(product9$favored_count, product9$sold_count)
## [1] 0.4509207
Only for products 2,3 and 8 favored correlation coefficient is high. This may be due to the fact that sold counts for those products are mainly high starting from winter of 20210. This variable will not be used as a regressor for any product.
By visual inspection correlation between category sold and sold count is possible.
ggplot(data=my_data)+geom_line(aes(x=event_date, y=category_sold))+facet_grid(rows="product", scales = "free")+ theme_minimal()+
labs(title="Category Sold")
ggplot(data=my_data)+geom_line(aes(x=event_date, y=sold_count))+facet_grid(rows="product", scales ="free")+ theme_minimal()+
labs(title="Sold Counts")
The numeric results are
cor(product1$category_sold, product1$sold_count)
## [1] 0.9000466
cor(product2$category_sold, product2$sold_count)
## [1] 0.7642985
cor(product3$category_sold, product3$sold_count)
## [1] 0.3998297
cor(product4$category_sold, product4$sold_count)
## [1] 0.9157251
cor(product5$category_sold, product5$sold_count)
## [1] 0.1772956
cor(product6$category_sold, product6$sold_count)
## [1] 0.5323229
cor(product7$category_sold, product7$sold_count)
## [1] 0.7591626
cor(product8$category_sold, product8$sold_count)
## [1] 0.763763
cor(product9$category_sold, product9$sold_count)
## [1] 0.6230038
For products 1, 2, 4, 7, and 8 category sold could be used as a regressor.
Since category visit counts are very large it is not possible to observe visually.
ggplot(data=my_data)+geom_line(aes(x=event_date, y=category_visits))+facet_grid(rows="product", scales = "free")+ theme_minimal()+
labs(title="Category Visits")
ggplot(data=my_data)+geom_line(aes(x=event_date, y=sold_count))+facet_grid(rows="product", scales ="free")+ theme_minimal()+
labs(title="Sold Counts")
Numeric correlation coefficients are checked and there is not any significant corelattion.
cor(product1$category_visits, product1$sold_count)
## [1] 0.05368907
cor(product2$category_visits, product2$sold_count)
## [1] 0.2285581
cor(product3$category_visits, product3$sold_count)
## [1] 0.1229495
cor(product4$category_visits, product4$sold_count)
## [1] 0.4285587
cor(product5$category_visits, product5$sold_count)
## [1] 0.09676247
cor(product6$category_visits, product6$sold_count)
## [1] 0.01179411
cor(product7$category_visits, product7$sold_count)
## [1] 0.004137553
cor(product8$category_visits, product8$sold_count)
## [1] 0.4286554
cor(product9$category_visits, product9$sold_count)
## [1] 0.1228725
Category basket values are 0 for all products until February of 2021.
ggplot(data=my_data)+geom_line(aes(x=event_date, y=category_basket))+facet_grid(rows="product", scales = "free")+ theme_minimal()+
labs(title="Category Basket")
ggplot(data=my_data)+geom_line(aes(x=event_date, y=sold_count))+facet_grid(rows="product", scales ="free")+ theme_minimal()+
labs(title="Sold Counts")
Numeric correlation coefficients are checked and there is not any significant corelattion.
cor(product1$category_basket, product1$sold_count)
## [1] -0.08685947
cor(product2$category_basket, product2$sold_count)
## [1] 0.7458757
cor(product3$category_basket, product3$sold_count)
## [1] 0.5771166
cor(product4$category_basket, product4$sold_count)
## [1] 0.1962214
cor(product5$category_basket, product5$sold_count)
## [1] -0.09776011
cor(product6$category_basket, product6$sold_count)
## [1] 0.06057578
cor(product7$category_basket, product7$sold_count)
## [1] -0.1224585
cor(product8$category_basket, product8$sold_count)
## [1] 0.7743248
cor(product9$category_basket, product9$sold_count)
## [1] 0.2880863
It seems like there is some correlation between sold count and category basket for some products. This may be due to the fact that sold counts of those products are high in the winter of 2021 and low for other periods which corresponds with the category basket. This variable will not be used as a regressor.
Category brand sold values are also 0 for all products until 2021. This is the same issue with category basket and favored count this variable is will not be used as a regressor for any products.
ggplot(data=my_data)+geom_line(aes(x=event_date, y=category_brand_sold))+facet_grid(rows="product", scales = "free")+ theme_minimal()+
labs(title="Category Brand Sold")
ggplot(data=my_data)+geom_line(aes(x=event_date, y=sold_count))+facet_grid(rows="product", scales ="free")+ theme_minimal()+
labs(title="Sold Counts")
TY visits are 0 for all products until February of 2021. This variable also will not be used as a regressor.
ggplot(data=my_data)+geom_line(aes(x=event_date, y=ty_visits))+theme_minimal()+
labs(title="TY VISITS")
ggplot(data=my_data)+geom_line(aes(x=event_date, y=sold_count))+facet_grid(rows="product", scales ="free")+ theme_minimal()+
labs(title="Sold Counts")
Possible regressors defined in the previous section are:
Product 1 : basket_count and category_sold
Product 2 : basket_count and category_sold
Product 3 : basket_count
Product 4 : basket_count and category_sold
Product 5 : basket_count
Product 6 : price and basket_count
Product 7 : basket_count and category_sold
Product 8 : visit_count, basket_count and category_sold
Product 9 : basket_count
Above variables will be used to model residuals of ARIMA models proposed in previous section.
pr1_predictions[, residuals:=sold_count-fitted]
product1$residuals=pr1_predictions$residuals
product1$arima=pr1_predictions$fitted
pr1_lm = lm(residuals~basket_count + category_sold, data=product1[event_date<"2021-05-22"])
pr1_lm
##
## Call:
## lm(formula = residuals ~ basket_count + category_sold, data = product1[event_date <
## "2021-05-22"])
##
## Coefficients:
## (Intercept) basket_count category_sold
## -53.52813 -0.06122 0.16851
product1 = cbind(product1, res_pred=predict(pr1_lm, new_data=product1))
product1[, fitted:= res_pred+arima]
ggplot(data=product1[event_date<="2021-05-28"&event_date>="2021-05-22",], aes(x=event_date, y=fitted, col="fitted"))+geom_line()+geom_line(data=product1[event_date<="2021-05-28"&event_date>="2021-05-22",],aes(x=event_date , y=sold_count, col="sold_count"))+geom_line(data=product1[event_date<="2021-05-28"&event_date>="2021-05-22",],aes(x=event_date , y=arima, col="arima"))
pr2_predictions[, residuals:=sold_count-fitted]
product2$residuals=pr2_predictions$residuals
product2$arima=pr2_predictions$fitted
pr2_lm = lm(residuals~basket_count + category_sold, data=product2[event_date<"2021-05-22"])
pr2_lm
##
## Call:
## lm(formula = residuals ~ basket_count + category_sold, data = product2[event_date <
## "2021-05-22"])
##
## Coefficients:
## (Intercept) basket_count category_sold
## -0.4466786 0.0030806 0.0001691
product2 = cbind(product2, res_pred=predict(pr2_lm, new_data=product2))
product2[, fitted:= res_pred+arima]
ggplot(data=product2[event_date<="2021-05-28"&event_date>="2021-05-22",], aes(x=event_date, y=fitted, col="fitted"))+geom_line()+geom_line(data=product2[event_date<="2021-05-28"&event_date>="2021-05-22",],aes(x=event_date , y=sold_count, col="sold_count"))+geom_line(data=product2[event_date<="2021-05-28"&event_date>="2021-05-22",],aes(x=event_date , y=arima, col="arima"))
pr3_predictions[, residuals:=sold_count-fitted]
product3$residuals=pr3_predictions$residuals
product3$arima=pr3_predictions$fitted
pr3_lm = lm(residuals~basket_count, data=product3[event_date<"2021-05-22"])
pr3_lm
##
## Call:
## lm(formula = residuals ~ basket_count, data = product3[event_date <
## "2021-05-22"])
##
## Coefficients:
## (Intercept) basket_count
## -6.19110 0.01423
product3 = cbind(product3, res_pred=predict(pr3_lm, new_data=product3))
product3[, fitted:= res_pred+arima]
ggplot(data=product3[event_date<="2021-05-28"&event_date>="2021-05-22",], aes(x=event_date, y=fitted, col="fitted"))+geom_line()+geom_line(data=product3[event_date<="2021-05-28"&event_date>="2021-05-22",],aes(x=event_date , y=sold_count, col="sold_count"))+geom_line(data=product3[event_date<="2021-05-28"&event_date>="2021-05-22",],aes(x=event_date , y=arima, col="arima"))
pr4_predictions[, residuals:=sold_count-fitted]
product4$residuals=pr4_predictions$residuals
product4$arima=pr4_predictions$fitted
pr4_lm = lm(residuals~basket_count + category_sold, data=product4[event_date<"2021-05-22"])
pr4_lm
##
## Call:
## lm(formula = residuals ~ basket_count + category_sold, data = product4[event_date <
## "2021-05-22"])
##
## Coefficients:
## (Intercept) basket_count category_sold
## -50.64080 -0.01864 0.04332
product4 = cbind(product4, res_pred=predict(pr4_lm, new_data=product4))
product4[, fitted:= res_pred+arima]
ggplot(data=product4[event_date<="2021-05-28"&event_date>="2021-05-22",], aes(x=event_date, y=fitted, col="fitted"))+geom_line()+geom_line(data=product4[event_date<="2021-05-28"&event_date>="2021-05-22",],aes(x=event_date , y=sold_count, col="sold_count"))+geom_line(data=product4[event_date<="2021-05-28"&event_date>="2021-05-22",],aes(x=event_date , y=arima, col="arima"))
pr5_predictions[, residuals:=sold_count-fitted]
product5$residuals=pr5_predictions$residuals
product5$arima=pr5_predictions$fitted
pr5_lm = lm(residuals~basket_count, data=product5[event_date<"2021-05-22"])
pr5_lm
##
## Call:
## lm(formula = residuals ~ basket_count, data = product5[event_date <
## "2021-05-22"])
##
## Coefficients:
## (Intercept) basket_count
## -0.20615 0.04331
product5 = cbind(product5, res_pred=predict(pr5_lm, new_data=product5))
product5[, fitted:= res_pred+arima]
ggplot(data=product5[event_date<="2021-05-28"&event_date>="2021-05-22",], aes(x=event_date, y=fitted, col="fitted"))+geom_line()+geom_line(data=product5[event_date<="2021-05-28"&event_date>="2021-05-22",],aes(x=event_date , y=sold_count, col="sold_count"))+geom_line(data=product5[event_date<="2021-05-28"&event_date>="2021-05-22",],aes(x=event_date , y=arima, col="arima"))
pr6_predictions[, residuals:=sold_count-fitted]
product6$residuals=pr6_predictions$residuals
product6$arima=pr6_predictions$fitted
pr6_lm = lm(residuals~basket_count + price, data=product6[event_date<"2021-05-22"])
pr6_lm
##
## Call:
## lm(formula = residuals ~ basket_count + price, data = product6[event_date <
## "2021-05-22"])
##
## Coefficients:
## (Intercept) basket_count price
## -100.38798 0.04621 0.19426
product6 = cbind(product6, res_pred=predict(pr6_lm, new_data=product6))
product6[, fitted:= res_pred+arima]
ggplot(data=product6[event_date<="2021-05-28"&event_date>="2021-05-22",], aes(x=event_date, y=fitted, col="fitted"))+geom_line()+geom_line(data=product6[event_date<="2021-05-28"&event_date>="2021-05-22",],aes(x=event_date , y=sold_count, col="sold_count"))+geom_line(data=product6[event_date<="2021-05-28"&event_date>="2021-05-22",],aes(x=event_date , y=arima, col="arima"))
pr7_predictions[, residuals:=sold_count-fitted]
product7$residuals=pr7_predictions$residuals
product7$arima=pr7_predictions$fitted
pr7_lm = lm(residuals~basket_count + category_sold, data=product7[event_date<"2021-05-22"])
pr7_lm
##
## Call:
## lm(formula = residuals ~ basket_count + category_sold, data = product7[event_date <
## "2021-05-22"])
##
## Coefficients:
## (Intercept) basket_count category_sold
## -4.31413 -0.03833 0.05830
product7 = cbind(product7, res_pred=predict(pr7_lm, new_data=product7))
product7[, fitted:= res_pred+arima]
ggplot(data=product7[event_date<="2021-05-28"&event_date>="2021-05-22",], aes(x=event_date, y=fitted, col="fitted"))+geom_line()+geom_line(data=product7[event_date<="2021-05-28"&event_date>="2021-05-22",],aes(x=event_date , y=sold_count, col="sold_count"))+geom_line(data=product7[event_date<="2021-05-28"&event_date>="2021-05-22",],aes(x=event_date , y=arima, col="arima"))
pr8_predictions[, residuals:=sold_count-fitted]
product8$residuals=pr8_predictions$residuals
product8$arima=pr8_predictions$fitted
pr8_lm = lm(residuals~basket_count + category_sold+visit_count, data=product8[event_date<"2021-05-22"])
pr8_lm
##
## Call:
## lm(formula = residuals ~ basket_count + category_sold + visit_count,
## data = product8[event_date < "2021-05-22"])
##
## Coefficients:
## (Intercept) basket_count category_sold visit_count
## 0.3212686 0.0045766 -0.0007774 0.0001670
product8 = cbind(product8, res_pred=predict(pr8_lm, new_data=product8))
product8[, fitted:= res_pred+arima]
ggplot(data=product8[event_date<="2021-05-28"&event_date>="2021-05-22",], aes(x=event_date, y=fitted, col="fitted"))+geom_line()+geom_line(data=product8[event_date<="2021-05-28"&event_date>="2021-05-22",],aes(x=event_date , y=sold_count, col="sold_count"))+geom_line(data=product8[event_date<="2021-05-28"&event_date>="2021-05-22",],aes(x=event_date , y=arima, col="arima"))
pr9_predictions[, residuals:=sold_count-fitted]
product9$residuals=pr9_predictions$residuals
product9$arima=pr9_predictions$fitted
pr9_lm = lm(residuals~basket_count, data=product9[event_date<"2021-05-22"])
pr9_lm
##
## Call:
## lm(formula = residuals ~ basket_count, data = product9[event_date <
## "2021-05-22"])
##
## Coefficients:
## (Intercept) basket_count
## -5.77632 0.01562
product9 = cbind(product9, res_pred=predict(pr9_lm, new_data=product9))
product9[, fitted:= res_pred+arima]
ggplot(data=product9[event_date<="2021-05-28"&event_date>="2021-05-22",], aes(x=event_date, y=fitted, col="fitted"))+geom_line()+geom_line(data=product9[event_date<="2021-05-28"&event_date>="2021-05-22",],aes(x=event_date , y=sold_count, col="sold_count"))+geom_line(data=product9[event_date<="2021-05-28"&event_date>="2021-05-22",],aes(x=event_date , y=arima, col="arima"))